Integrate all 4 samples and perform all steps data located at: /ourdisk/hpc/iicomicswshp/dont_archive/omics_workshop_2025/day2/data
library(Seurat)
library(SeuratWrappers)
library(presto)
library(ggplot2)
library(scDblFinder)
library(here)
set.seed(9)
KZ1 <- Read10X(data.dir = here("data/KZ1/filtered_feature_bc_matrix/"))
KZ2 <- Read10X(data.dir = here("data/KZ2/filtered_feature_bc_matrix/"))
KZ3 <- Read10X(data.dir = here("data/KZ3/filtered_feature_bc_matrix/"))
KZ4 <- Read10X(data.dir = here("data/KZ4/filtered_feature_bc_matrix/"))
KZ1 <- CreateSeuratObject( counts = KZ1, project = "KZ1")
KZ2 <- CreateSeuratObject( counts = KZ2, project = "KZ2")
KZ3 <- CreateSeuratObject( counts = KZ3, project = "KZ3")
KZ4 <- CreateSeuratObject( counts = KZ4, project = "KZ4")
## add metadata
KZ1$group <- "CON"
KZ2$group <- "PKD"
KZ3$group <- "CON"
KZ4$group <- "PKD"
sce1 <- as.SingleCellExperiment(KZ1)
Warning: Layer ‘data’ is empty
Warning: Layer ‘scale.data’ is empty
sce2 <- as.SingleCellExperiment(KZ2)
Warning: Layer ‘data’ is empty
Warning: Layer ‘scale.data’ is empty
sce3 <- as.SingleCellExperiment(KZ3)
Warning: Layer ‘data’ is empty
Warning: Layer ‘scale.data’ is empty
sce4 <- as.SingleCellExperiment(KZ4)
Warning: Layer ‘data’ is empty
Warning: Layer ‘scale.data’ is empty
sce1 <- scDblFinder(sce1)
Creating ~7771 artificial doublets...
Dimensional reduction
Evaluating kNN...
Training model...
iter=0, 1980 cells excluded from training.
iter=1, 1983 cells excluded from training.
iter=2, 1904 cells excluded from training.
Threshold found:0.459
921 (9.5%) doublets called
sce2 <- scDblFinder(sce2)
Creating ~7400 artificial doublets...
Dimensional reduction
Evaluating kNN...
Training model...
iter=0, 1658 cells excluded from training.
iter=1, 1699 cells excluded from training.
iter=2, 1654 cells excluded from training.
Threshold found:0.453
807 (8.7%) doublets called
sce3 <- scDblFinder(sce3)
Creating ~7811 artificial doublets...
Dimensional reduction
Evaluating kNN...
Training model...
iter=0, 1791 cells excluded from training.
iter=1, 1855 cells excluded from training.
iter=2, 1822 cells excluded from training.
Threshold found:0.436
938 (9.6%) doublets called
sce4 <- scDblFinder(sce4)
Creating ~8249 artificial doublets...
Dimensional reduction
Evaluating kNN...
Training model...
iter=0, 1746 cells excluded from training.
iter=1, 1792 cells excluded from training.
iter=2, 1775 cells excluded from training.
Threshold found:0.432
1081 (10.5%) doublets called
table(sce1$scDblFinder.class)
singlet doublet
8792 921
table(sce2$scDblFinder.class)
singlet doublet
8442 807
table(sce3$scDblFinder.class)
singlet doublet
8825 938
table(sce4$scDblFinder.class)
singlet doublet
9230 1081
multi.seurat <- merge(x = KZ1, y = c(KZ2, KZ3, KZ4),
add.cell.ids = c("KZ1", "KZ2", "KZ3", "KZ4"), project = "Mouse_Kidney")
# How many cells and genes?
multi.seurat
An object of class Seurat
32285 features across 39036 samples within 1 assay
Active assay: RNA (32285 features, 0 variable features)
4 layers present: counts.KZ1, counts.KZ2, counts.KZ3, counts.KZ4
multi.seurat$doublet <- c(sce1$scDblFinder.class, sce2$scDblFinder.class, sce3$scDblFinder.class, sce4$scDblFinder.class)
multi.seurat <- PercentageFeatureSet(multi.seurat, pattern = "^mt-", col.name = "percent.mt")
multi.seurat <- PercentageFeatureSet(multi.seurat, pattern = "^Rp[s/l]", col.name = "percent.rb")
VlnPlot(multi.seurat, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), ncol = 2)
Warning: Default search for "data" layer in "RNA" assay yielded no results; utilizing "counts" layer instead.
ggsave(here("plots/3.integrate/raw_Vln.jpg"), width = 10, height = 10)
FeatureScatter(multi.seurat, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
ggsave(here("plots/3.integrate/raw_countXfeat.jpg"), width = 5, height = 5)
FeatureScatter(multi.seurat, feature1 = "nCount_RNA", feature2 = "percent.mt")
ggsave(here("plots/3.integrate/raw_countXmito.jpg"), width = 5, height = 5)
FeatureScatter(multi.seurat, feature1 = "percent.rb", feature2 = "percent.mt")
ggsave(here("plots/3.integrate/raw_riboXmito.jpg"), width = 5, height = 5)
FeatureScatter(multi.seurat, feature1 = "percent.rb", feature2 = "nFeature_RNA")
ggsave(here("plots/3.integrate/raw_riboXfeat.jpg"), width = 5, height = 5)
multi.seurat[["QC"]] = ifelse(multi.seurat$doublet == "doublet", "doublet", "Pass")
multi.seurat[["QC"]] = ifelse(multi.seurat$nFeature_RNA < 400 & multi.seurat$QC == 'Pass', 'low_Feat', multi.seurat@meta.data$QC)
multi.seurat[["QC"]] = ifelse(multi.seurat$percent.mt > 25 & multi.seurat$QC == 'Pass', 'high_mt', multi.seurat@meta.data$QC)
multi.seurat[["QC"]] = ifelse(multi.seurat$percent.rb < 0 & multi.seurat$QC == 'Pass', "low_rb", multi.seurat@meta.data$QC)
# How many cells are you filtering?
table(multi.seurat$QC)
doublet high_mt low_Feat Pass
3747 5394 3641 26254
VlnPlot(multi.seurat, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), group.by = "QC", ncol = 2)
Warning: Default search for "data" layer in "RNA" assay yielded no results; utilizing "counts" layer instead.
ggsave(here("plots/3.integrate/filt_vln.jpg"), width = 10, height = 10)
FeatureScatter(multi.seurat, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", group.by = "QC")
ggsave(here("plots/3.integrate/filt_countXfeat.jpg"), width = 5, height = 5)
FeatureScatter(multi.seurat, feature1 = "nCount_RNA", feature2 = "percent.mt", group.by = "QC")
ggsave(here("plots/3.integrate/filt_countXmito.jpg"), width = 5, height = 5)
FeatureScatter(multi.seurat, feature1 = "percent.rb", feature2 = "percent.mt", group.by = "QC")
ggsave(here("plots/3.integrate/filt_riboXmito.jpg"), width = 5, height = 5)
FeatureScatter(multi.seurat, feature1 = "percent.rb", feature2 = "nFeature_RNA", group.by = "QC")
ggsave(here("plots/3.integrate/filt_riboXfeat.jpg"), width = 5, height = 5)
## filter out the cells that passed QC checks
multi.filt <- subset(multi.seurat, subset = QC == "Pass")
multi.filt <- NormalizeData(multi.filt)
Normalizing layer: counts.KZ1
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Normalizing layer: counts.KZ2
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Normalizing layer: counts.KZ3
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Normalizing layer: counts.KZ4
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
multi.filt<- FindVariableFeatures(multi.filt, nfeatures = 3000)
Finding variable features for layer counts.KZ1
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Finding variable features for layer counts.KZ2
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Finding variable features for layer counts.KZ3
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Finding variable features for layer counts.KZ4
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
multi.filt <- ScaleData(multi.filt)
Centering and scaling data matrix
|
| | 0%
|
|================================================ | 33%
|
|================================================================================================= | 67%
|
|=================================================================================================================================================| 100%
## Plot variable features with labels
top10 <- head(x = VariableFeatures(multi.filt), 10)
plot1 <- VariableFeaturePlot(object = multi.filt)
LabelPoints(plot = plot1, points = top10, repel = TRUE)
When using repel, set xnudge and ynudge to 0 for optimal results
Warning in scale_x_log10() :
log-10 transformation introduced infinite values.
ggsave(here("plots/3.integrate/HVG.jpg"), width = 5, height = 5)
Warning in scale_x_log10() :
log-10 transformation introduced infinite values.
## PCA
multi.filt <- RunPCA(multi.filt)
PC_ 1
Positive: Esrrg, Ptprd, Kcnj16, Tinag, Ank3, Pkhd1, Pter, Slc25a21, Slc4a4, Fut9
Glis3, Slc47a1, Cyp2j5, Atp6v0a4, Lrp2, Bnc2, Ndrg1, Phyhipl, Bicc1, Trabd2b
Keg1, Abcc2, Pdzk1, Nox4, Chpt1, Myo5b, Slc13a1, Osbpl6, Slc34a1, Cubn
Negative: Ctss, Cd74, Fcer1g, Tyrobp, Ly86, H2-Aa, Spi1, Lsp1, H2-Eb1, Cybb
Crip1, H2-Ab1, Tmsb10, Ifi27l2a, Ctsc, Lst1, Lyz2, Pik3ap1, Rgs10, Pid1
Ifitm3, Hck, Apobec1, Cd300c2, Ms4a6b, March1, Ms4a6c, Plbd1, H2-DMa, Alox5ap
PC_ 2
Positive: Ctss, Tyrobp, Fcer1g, Ly86, Spi1, Cd74, Cybb, H2-Eb1, H2-Aa, Ctsc
H2-Ab1, Lyz2, Cd300c2, Plbd1, Pid1, March1, Lst1, Csf1r, Hck, Apobec1
H2-DMa, Pik3ap1, Rgs10, Fcgr2b, Adgre1, Pld4, Mpeg1, Cd68, Ms4a6c, Wfdc17
Negative: Meis2, Ldb2, Emcn, Plpp1, Ptprb, Ptprm, Egfl7, Ptprg, Pbx1, Flt1
Adgrl4, Plpp3, Prex2, Adgrf5, Eng, Samd12, Sparc, Cyyr1, Fbxl7, Etl4
Dysf, Tek, Cald1, Limch1, Cdh5, Tm4sf1, Epas1, Ly6c1, Fgd5, Nbea
PC_ 3
Positive: Fmo1, Cyp4b1, Abcg2, Meis2, Emcn, Ptprb, Plcb1, Flt1, Ldb2, Plpp1
Slc34a1, Egfl7, Adgrl4, Cyyr1, Cyp2j5, Pakap.1, Keg1, Pter, Lrp2, Fbp1
Alpl, Pdzk1, Eng, Calml4, Sema6a, Kdr, Miox, Pdzd2, Aldob, Acsm2
Negative: Sim1, Slit2, Scnn1b, Naaladl2, Tmem45b, Epcam, Krt7, Rhcg, Mal2, Cdh1
Kcnj1, Scnn1g, Hsd11b2, Sgpp2, Efna5, Defb1, Aif1l, Tspan1, Rgs6, Cldn7
Cldn8, Tfap2b, Tmem213, Tacstd2, Kcnq1, Ehf, Fxyd4, Bmpr1b, Ccser1, Tmem178
PC_ 4
Positive: Cst3, Pid1, Csf1r, C1qc, Adgre1, Fcer1g, C1qb, C1qa, Cd300c2, Lyz2
Tyrobp, Aif1, Psap, Spi1, Ms4a7, Lst1, Fcgr3, Cd68, Mpeg1, Mafb
P2ry6, Trf, H2-DMb1, Apobec1, Cx3cr1, Ifi207, Cxcl16, Lilra5, Adap2, Plbd1
Negative: Skap1, Lck, Ms4a4b, Cd3g, Ptprcap, Trbc2, Itk, Themis, Tnik, Gm2682
Ikzf3, Cd3e, Cd247, Cd3d, Bcl11b, Prkcq, Ltb, Stat4, Trac, Tox
Cd226, Thy1, Camk4, Gimap7, BE692007, Cxcr6, Nkg7, Ripor2, Trbc1, Il7r
PC_ 5
Positive: Gucy1a2, Pdgfrb, Gucy1a1, Lhfp, Cacna2d1, Gpc6, Dgkb, Fhl2, Pde3a, Ror1
Mrvi1, Carmn, Agtr1a, Des, Gucy1b1, Myl9, Ano1, Serping1, Galnt17, Robo1
Tenm3, Daam2, Lama2, Fign, Pdgfra, S1pr3, B130024G19Rik, C1s1, Pawr, Ptn
Negative: Chchd10, Prdx5, S100a1, Ttc36, Pcbd1, Etfb, Cisd1, Rida, Guca2b, Khk
Ldhb, Fxyd2, Dbi, Cbr1, Hint2, Mpc2, Akr7a5, Mdh1, Cyb5a, Acy3
Gstm5, Ddt, Acot1, Gpx1, Sult1d1, Defb29, Hist1h2bc, Acadm, Pdzk1ip1, Cycs
## Check out the PCA analysis
DimPlot(multi.filt, reduction="pca")
ggsave(here("plots/3.integrate/PCA.jpg"), width = 5, height = 5)
ElbowPlot(multi.filt, ndims = 50, reduction = "pca")
ggsave(here("plots/3.integrate/ElbowPlot.jpg"), width = 5, height = 5)
## Clustering and UMAP
multi.filt <- FindNeighbors(multi.filt, dims = 1:30, reduction = "pca")
Computing nearest neighbor graph
Computing SNN
multi.filt <- FindClusters(multi.filt, resolution = 0.4, cluster.name = "unintegrated.clusters")
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 26254
Number of edges: 960633
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.9636
Number of communities: 29
Elapsed time: 2 seconds
multi.filt <- RunUMAP(multi.filt, dims = 1:30, reduction = "pca", reduction.name = "umap.unintegrated")
14:36:51 UMAP embedding parameters a = 0.9922 b = 1.112
14:36:51 Read 26254 rows and found 30 numeric columns
14:36:51 Using Annoy for neighbor search, n_neighbors = 30
14:36:51 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
14:36:53 Writing NN index file to temp file /var/folders/tp/z4kv_q0d5j98lblm0rx44pl40000gr/T//Rtmp3RyVZV/file80e54b751dc
14:36:53 Searching Annoy index using 1 thread, search_k = 3000
14:36:56 Annoy recall = 100%
14:36:57 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
14:36:57 Initializing from normalized Laplacian + noise (using RSpectra)
14:37:01 Commencing optimization for 200 epochs, with 1116870 positive edges
Using method 'umap'
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
14:37:08 Optimization finished
# Check out unintegrated UMAP
DimPlot(multi.filt, reduction = "umap.unintegrated", group.by = "unintegrated.clusters", label=TRUE)
ggsave(here("plots/3.integrate/unintegrated_umap.jpg"), width = 5, height = 5)
DimPlot(multi.filt, reduction = "umap.unintegrated", group.by = "orig.ident", label=FALSE)
ggsave(here("plots/3.integrate/unintegrated_umap_ids.jpg"), width = 5, height = 5)
save(multi.filt, file = here("data/multi.filt.rda"))
# tidy environment before integration test
rm(KZ1, KZ2, KZ3, KZ4, sce1, sce2, sce3, sce4, plot1, multi.seurat, top10)
## CCA
integrated.seurat <- IntegrateLayers(object = multi.filt, method = CCAIntegration, orig.reduction = "pca", new.reduction = "cca") # 2.5 min at 8 GB, 14 min laptop
Finding all pairwise anchors
| | 0 % ~calculating
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 10335 anchors
|+++++++++ | 17% ~07m 57s
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 8847 anchors
|+++++++++++++++++ | 33% ~06m 35s
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 13607 anchors
|+++++++++++++++++++++++++ | 50% ~04m 56s
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 13460 anchors
|++++++++++++++++++++++++++++++++++ | 67% ~03m 10s
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 10284 anchors
|++++++++++++++++++++++++++++++++++++++++++ | 83% ~01m 34s
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 9356 anchors
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=09m 26s
Merging dataset 4 into 1
Extracting anchors for merged samples
Finding integration vectors
Finding integration vector weights
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Integrating data
Merging dataset 2 into 3
Extracting anchors for merged samples
Finding integration vectors
Finding integration vector weights
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Integrating data
Merging dataset 1 4 into 3 2
Extracting anchors for merged samples
Finding integration vectors
Finding integration vector weights
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Integrating data
#Visualize and cluster
integrated.seurat <- FindNeighbors(integrated.seurat, reduction = "cca", dims = 1:30)
Computing nearest neighbor graph
Computing SNN
integrated.seurat <- FindClusters(integrated.seurat, resolution = 0.4, cluster.name = "cca.clusters")
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 26254
Number of edges: 1097557
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.9656
Number of communities: 30
Elapsed time: 2 seconds
integrated.seurat <- RunUMAP(integrated.seurat, reduction = "cca", dims = 1:30, reduction.name = "umap.cca")
14:49:28 UMAP embedding parameters a = 0.9922 b = 1.112
14:49:28 Read 26254 rows and found 30 numeric columns
14:49:28 Using Annoy for neighbor search, n_neighbors = 30
14:49:28 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
14:49:30 Writing NN index file to temp file /var/folders/tp/z4kv_q0d5j98lblm0rx44pl40000gr/T//Rtmp3RyVZV/file80e7843dd5b
14:49:30 Searching Annoy index using 1 thread, search_k = 3000
14:49:33 Annoy recall = 100%
14:49:34 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
14:49:35 Initializing from normalized Laplacian + noise (using RSpectra)
14:49:38 Commencing optimization for 200 epochs, with 1147036 positive edges
Using method 'umap'
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
14:49:46 Optimization finished
DimPlot(integrated.seurat, reduction = "umap.cca", group.by = "cca.clusters", label=TRUE)
ggsave(here("plots/3.integrate/umap_CCA.jpg"), width = 5, height = 5)
## RPCA
integrated.seurat <- IntegrateLayers(object = integrated.seurat, method = RPCAIntegration, orig.reduction = "pca", new.reduction = "rpca") # 2.5 min at 8GB, 2 min laptop
Computing within dataset neighborhoods
| | 0 % ~calculating
|+++++++++++++ | 25% ~03s
|+++++++++++++++++++++++++ | 50% ~02s
|++++++++++++++++++++++++++++++++++++++ | 75% ~01s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=03s
Finding all pairwise anchors
| | 0 % ~calculating
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 6601 anchors
|+++++++++ | 17% ~15s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 5610 anchors
|+++++++++++++++++ | 33% ~12s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 10369 anchors
|+++++++++++++++++++++++++ | 50% ~10s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 8820 anchors
|++++++++++++++++++++++++++++++++++ | 67% ~06s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 5802 anchors
|++++++++++++++++++++++++++++++++++++++++++ | 83% ~03s
Projecting new data onto SVD
Projecting new data onto SVD
Finding neighborhoods
Finding anchors
Found 5083 anchors
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=19s
Merging dataset 2 into 3
Extracting anchors for merged samples
Finding integration vectors
Finding integration vector weights
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Integrating data
Merging dataset 4 into 1
Extracting anchors for merged samples
Finding integration vectors
Finding integration vector weights
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Integrating data
Merging dataset 1 4 into 3 2
Extracting anchors for merged samples
Finding integration vectors
Finding integration vector weights
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Integrating data
#Visualize and cluster
integrated.seurat <- FindNeighbors(integrated.seurat, reduction = "rpca", dims = 1:30)
Computing nearest neighbor graph
Computing SNN
integrated.seurat <- FindClusters(integrated.seurat, resolution = 0.4, cluster.name = "rpca.clusters")
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 26254
Number of edges: 1035073
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.9655
Number of communities: 29
Elapsed time: 2 seconds
integrated.seurat <- RunUMAP(integrated.seurat, reduction = "rpca", dims = 1:30, reduction.name = "umap.rpca")
14:50:35 UMAP embedding parameters a = 0.9922 b = 1.112
14:50:35 Read 26254 rows and found 30 numeric columns
14:50:35 Using Annoy for neighbor search, n_neighbors = 30
14:50:35 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
14:50:37 Writing NN index file to temp file /var/folders/tp/z4kv_q0d5j98lblm0rx44pl40000gr/T//Rtmp3RyVZV/file80e2f79bf0c
14:50:37 Searching Annoy index using 1 thread, search_k = 3000
14:50:40 Annoy recall = 100%
14:50:41 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
14:50:42 Initializing from normalized Laplacian + noise (using RSpectra)
14:50:45 Commencing optimization for 200 epochs, with 1130948 positive edges
Using method 'umap'
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
14:50:52 Optimization finished
DimPlot(integrated.seurat, reduction = "umap.rpca", group.by = "rpca.clusters", label=TRUE)
ggsave(here("plots/3.integrate/umap_RPCA.jpg"), width = 5, height = 5)
## Harmony
integrated.seurat <- IntegrateLayers(object = integrated.seurat, method = HarmonyIntegration, orig.reduction = "pca", new.reduction = "harmony") # 3 min at 8GB, 1 min laptop
Warning in harmony::HarmonyMatrix(data_mat = Embeddings(object = orig), :
HarmonyMatrix is deprecated and will be removed in the future from the API in the future
Warning: Warning: The parameters do_pca and npcs are deprecated. They will be ignored for this function call and please remove parameters do_pca and npcs and pass to harmony cell_embeddings directly.
This warning is displayed once per session.
Warning: Warning: The parameter tau is deprecated. It will be ignored for this function call and please remove parameter tau in future function calls. Advanced users can set value of parameter tau by using parameter .options and function harmony_options().
This warning is displayed once per session.
Warning: Warning: The parameter block.size is deprecated. It will be ignored for this function call and please remove parameter block.size in future function calls. Advanced users can set value of parameter block.size by using parameter .options and function harmony_options().
This warning is displayed once per session.
Warning: Warning: The parameter max.iter.harmony is replaced with parameter max_iter. It will be ignored for this function call and please use parameter max_iter in future function calls.
This warning is displayed once per session.
Warning: Warning: The parameter max.iter.cluster is deprecated. It will be ignored for this function call and please remove parameter max.iter.cluster in future function calls. Advanced users can set value of parameter max.iter.cluster by using parameter .options and function harmony_options().
This warning is displayed once per session.
Warning: Warning: The parameter epsilon.cluster is deprecated. It will be ignored for this function call and please remove parameter epsilon.cluster in future function calls. Advanced users can set value of parameter epsilon.cluster by using parameter .options and function harmony_options().
This warning is displayed once per session.
Warning: Warning: The parameter epsilon.harmony is deprecated. It will be ignored for this function call and please remove parameter epsilon.harmony in future function calls. If users want to control if harmony would stop early or not, use parameter early_stop. Advanced users can set value of parameter epsilon.harmony by using parameter .options and function harmony_options().
This warning is displayed once per session.
Transposing data matrix
Using automatic lambda estimation
Initializing state using k-means centroids initialization
Harmony 1/10
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Harmony 2/10
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Harmony 3/10
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Harmony 4/10
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Harmony converged after 4 iterations
#Visualize and cluster
integrated.seurat <- FindNeighbors(integrated.seurat, reduction = "harmony", dims = 1:30)
Computing nearest neighbor graph
Computing SNN
integrated.seurat <- FindClusters(integrated.seurat, resolution = 0.4, cluster.name = "harmony.clusters")
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 26254
Number of edges: 979792
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.9638
Number of communities: 27
Elapsed time: 2 seconds
integrated.seurat <- RunUMAP(integrated.seurat, reduction = "harmony", dims = 1:30, reduction.name = "umap.harmony")
14:51:22 UMAP embedding parameters a = 0.9922 b = 1.112
14:51:22 Read 26254 rows and found 30 numeric columns
14:51:22 Using Annoy for neighbor search, n_neighbors = 30
14:51:22 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
14:51:23 Writing NN index file to temp file /var/folders/tp/z4kv_q0d5j98lblm0rx44pl40000gr/T//Rtmp3RyVZV/file80e61edc52d
14:51:23 Searching Annoy index using 1 thread, search_k = 3000
14:51:27 Annoy recall = 100%
14:51:27 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
14:51:28 Initializing from normalized Laplacian + noise (using RSpectra)
14:51:32 Commencing optimization for 200 epochs, with 1126620 positive edges
Using method 'umap'
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
14:51:39 Optimization finished
DimPlot(integrated.seurat, reduction = "umap.harmony", group.by = "harmony.clusters", label=TRUE)
ggsave(here("plots/3.integrate/umap_harmony.jpg"), width = 5, height = 5)
## MNN
library(SeuratWrappers)
integrated.seurat <- IntegrateLayers(object = integrated.seurat, method = FastMNNIntegration, orig.reduction = "pca", new.reduction = "mnn") # 1.5 min at 8GB, 30 sec laptop
Converting layers to SingleCellExperiment
Running fastMNN
Warning: Layer counts isn't present in the assay object; returning NULL
#Visualize and cluster
integrated.seurat <- FindNeighbors(integrated.seurat, reduction = "mnn", dims = 1:30)
Computing nearest neighbor graph
Computing SNN
integrated.seurat <- FindClusters(integrated.seurat, resolution = 0.4, cluster.name = "mnn.clusters")
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 26254
Number of edges: 1004889
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.9676
Number of communities: 32
Elapsed time: 2 seconds
integrated.seurat <- RunUMAP(integrated.seurat, reduction = "mnn", dims = 1:30, reduction.name = "umap.mnn")
14:51:59 UMAP embedding parameters a = 0.9922 b = 1.112
14:51:59 Read 26254 rows and found 30 numeric columns
14:51:59 Using Annoy for neighbor search, n_neighbors = 30
14:51:59 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
14:52:00 Writing NN index file to temp file /var/folders/tp/z4kv_q0d5j98lblm0rx44pl40000gr/T//Rtmp3RyVZV/file80e412c3f95
14:52:00 Searching Annoy index using 1 thread, search_k = 3000
14:52:04 Annoy recall = 100%
14:52:04 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
14:52:05 Initializing from normalized Laplacian + noise (using RSpectra)
14:52:09 Commencing optimization for 200 epochs, with 1144032 positive edges
Using method 'umap'
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
14:52:17 Optimization finished
DimPlot(integrated.seurat, reduction = "umap.mnn", group.by = "mnn.clusters", label=TRUE)
ggsave(here("plots/3.integrate/umap_MNN.jpg"), width = 5, height = 5)
# which integration method best separates the cell types?
# does the clustering resolution match the umap clusters?
DimPlot(integrated.seurat, reduction = "umap.cca", group.by = "orig.ident", label=FALSE)
DimPlot(integrated.seurat, reduction = "umap.rpca", group.by = "orig.ident", label=FALSE)
DimPlot(integrated.seurat, reduction = "umap.harmony", group.by = "orig.ident", label=FALSE)
DimPlot(integrated.seurat, reduction = "umap.mnn", group.by = "orig.ident", label=FALSE)
save(integrated.seurat, file = here("data/integrated.seurat.rda"))
sessionInfo()
R version 4.4.1 (2024-06-14)
Platform: aarch64-apple-darwin20
Running under: macOS Sonoma 14.5
Matrix products: default
BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
time zone: America/Chicago
tzcode source: internal
attached base packages:
[1] stats4 stats graphics grDevices utils datasets methods base
other attached packages:
[1] presto_1.0.0 data.table_1.17.0 Rcpp_1.0.14 SeuratWrappers_0.3.5 future_1.49.0
[6] here_1.0.1 scDblFinder_1.18.0 SingleCellExperiment_1.26.0 SummarizedExperiment_1.34.0 Biobase_2.64.0
[11] GenomicRanges_1.56.2 GenomeInfoDb_1.40.1 IRanges_2.38.1 S4Vectors_0.42.1 BiocGenerics_0.50.0
[16] MatrixGenerics_1.16.0 matrixStats_1.5.0 ggplot2_3.5.2 Seurat_5.2.1 SeuratObject_5.0.2
[21] sp_2.1-4
loaded via a namespace (and not attached):
[1] spatstat.sparse_3.1-0 bitops_1.0-9 httr_1.4.7 RColorBrewer_1.1-3 tools_4.4.1
[6] sctransform_0.4.1 R6_2.6.1 ResidualMatrix_1.14.1 lazyeval_0.2.2 uwot_0.2.2
[11] withr_3.0.2 gridExtra_2.3 progressr_0.15.1 cli_3.6.5 textshaping_1.0.0
[16] spatstat.explore_3.3-4 fastDummies_1.7.5 labeling_0.4.3 sass_0.4.10 spatstat.data_3.1-4
[21] ggridges_0.5.6 pbapply_1.7-2 Rsamtools_2.20.0 systemfonts_1.2.1 R.utils_2.12.3
[26] harmony_1.2.3 scater_1.32.1 parallelly_1.45.0 limma_3.60.6 rstudioapi_0.17.1
[31] generics_0.1.3 BiocIO_1.14.0 ica_1.0-3 spatstat.random_3.3-2 dplyr_1.1.4
[36] Matrix_1.7-2 ggbeeswarm_0.7.2 abind_1.4-8 R.methodsS3_1.8.2 lifecycle_1.0.4
[41] yaml_2.3.10 edgeR_4.2.2 SparseArray_1.4.8 Rtsne_0.17 grid_4.4.1
[46] promises_1.3.3 dqrng_0.4.1 crayon_1.5.3 miniUI_0.1.1.1 lattice_0.22-6
[51] beachmat_2.20.0 cowplot_1.1.3 pillar_1.10.1 knitr_1.50 metapod_1.12.0
[56] rjson_0.2.23 xgboost_1.7.8.1 future.apply_1.11.3 codetools_0.2-20 glue_1.8.0
[61] spatstat.univar_3.1-1 remotes_2.5.0 vctrs_0.6.5 png_0.1-8 spam_2.11-1
[66] gtable_0.3.6 cachem_1.1.0 xfun_0.52 S4Arrays_1.4.1 mime_0.13
[71] survival_3.8-3 statmod_1.5.0 bluster_1.14.0 fitdistrplus_1.2-2 ROCR_1.0-11
[76] nlme_3.1-167 RcppAnnoy_0.0.22 rprojroot_2.0.4 bslib_0.9.0 irlba_2.3.5.1
[81] vipor_0.4.7 KernSmooth_2.23-26 colorspace_2.1-1 ggrastr_1.0.2 tidyselect_1.2.1
[86] compiler_4.4.1 curl_6.2.2 BiocNeighbors_1.22.0 DelayedArray_0.30.1 plotly_4.10.4
[91] rtracklayer_1.64.0 scales_1.3.0 lmtest_0.9-40 stringr_1.5.1 digest_0.6.37
[96] goftest_1.2-3 spatstat.utils_3.1-2 rmarkdown_2.29 XVector_0.44.0 RhpcBLASctl_0.23-42
[101] htmltools_0.5.8.1 pkgconfig_2.0.3 sparseMatrixStats_1.16.0 fastmap_1.2.0 rlang_1.1.6
[106] htmlwidgets_1.6.4 UCSC.utils_1.0.0 shiny_1.10.0 DelayedMatrixStats_1.26.0 farver_2.1.2
[111] jquerylib_0.1.4 zoo_1.8-12 jsonlite_2.0.0 BiocParallel_1.38.0 R.oo_1.27.0
[116] BiocSingular_1.20.0 RCurl_1.98-1.16 magrittr_2.0.3 scuttle_1.14.0 GenomeInfoDbData_1.2.12
[121] dotCall64_1.2 patchwork_1.3.0 munsell_0.5.1 viridis_0.6.5 reticulate_1.42.0
[126] stringi_1.8.4 zlibbioc_1.50.0 MASS_7.3-64 plyr_1.8.9 parallel_4.4.1
[131] listenv_0.9.1 ggrepel_0.9.6 deldir_2.0-4 Biostrings_2.72.1 splines_4.4.1
[136] tensor_1.5 locfit_1.5-9.10 igraph_2.1.4 spatstat.geom_3.3-5 RcppHNSW_0.6.0
[141] reshape2_1.4.4 ScaledMatrix_1.12.0 XML_3.99-0.18 evaluate_1.0.4 scran_1.32.0
[146] BiocManager_1.30.25 httpuv_1.6.16 batchelor_1.20.0 RANN_2.6.2 tidyr_1.3.1
[151] purrr_1.0.4 polyclip_1.10-7 scattermore_1.2 rsvd_1.0.5 xtable_1.8-4
[156] restfulr_0.0.15 RSpectra_0.16-2 later_1.4.2 viridisLite_0.4.2 ragg_1.3.3
[161] tibble_3.2.1 beeswarm_0.4.0 GenomicAlignments_1.40.0 cluster_2.1.8 globals_0.18.0